require(igraph)
require(tidyverse)

NU_general_edges <- 
  read.csv("data/processed/pwdprotect_NU_general_edges.csv")

facebook_profiles <- 
  read.csv("data/raw/pwdprotect_facebook_profiles.csv")

NU_local_MeetUp_edges <- 
  read.csv("data/processed/pwdprotect_NU_local_MeetUp_edges.csv") %>%
  dplyr::mutate(source_ID = as.character(source_ID),
                target_ID = as.character(target_ID))

NU_general_edges <- 
  NU_general_edges %>%
  dplyr::bind_rows(NU_local_MeetUp_edges)

facebook_profiles$first_name <-
  tolower(gsub("([a-z]+).*", "\\1", facebook_profiles$profile_name))

first_names <- 
  facebook_profiles %>%
  dplyr::group_by(first_name) %>%
  dplyr::count()

first_names <- 
  first_names %>%
  dplyr::filter(n >= 5)

## test 1: No non-alpha

NU_general_edges <- 
  NU_general_edges %>%
  dplyr::filter(!grepl("[^[:alpha:] ]", source) &
                  !grepl("[^[:alpha:] ]", target))

## test 2: first name is a person name 

NU_general_edges$source_first_name <- 
  gsub("([a-z]+).*", "\\1", NU_general_edges$source)

NU_general_edges$target_first_name <- 
  gsub("([a-z]+).*", "\\1", NU_general_edges$target)

NU_general_edges <- 
  NU_general_edges %>%
  dplyr::filter(source_first_name %in% first_names$first_name &
                  target_first_name %in% first_names$first_name)

## test 3: Has first names and surname 
require(stringr)

NU_general_edges <- 
  NU_general_edges %>%
  dplyr::filter(str_count(source,  "\\S+") > 1 &
                  str_count(target,  "\\S+") > 1) 


require(igraph)

NU.g <- 
  igraph::graph_from_data_frame(NU_general_edges[,c(1,2,5)])

NU_without_meetup.g <- 
  igraph::graph_from_data_frame(NU_general_edges[NU_general_edges$platform != "meetup",c(1,2,5)])

NU_blog.g <- 
  igraph::graph_from_data_frame(NU_general_edges[NU_general_edges$platform == "blog" , c(1,2,5)])

NU_forum.g <- 
  igraph::graph_from_data_frame(NU_general_edges[NU_general_edges$platform == "forum" , c(1,2,5)])

NU_facebook.g <- 
  igraph::graph_from_data_frame(NU_general_edges[NU_general_edges$platform == "facebook" , c(1,2,5)])

NU_meetup.g <- 
  igraph::graph_from_data_frame(NU_general_edges[NU_general_edges$platform == "meetup" , c(1,2,5)])

NU.df <- 
  data.frame(node = V(NU.g)$name,
             indegree = igraph::degree(NU.g, mode = 'in'),
             outdegree = igraph::degree(NU.g, mode = 'out')
             # betweenness = igraph::betweenness.estimate(NU.g, cutoff = 10),
             # evcent = igraph::evcent(NU.g)$vector,
             # closeness = igraph::closeness(NU.g)
             # reach_2 = (igraph::ego_size(NU.g, 2)-1)/(igraph::vcount(NU.g)-1),
             # readh_3 = (igraph::ego_size(NU.g, 2)-1)/(igraph::vcount(NU.g)-1),
             # triangles = igraph::count_triangles(NU.g)
  ) 

NU_without_meetup.df <- 
  data.frame(node = V(NU_without_meetup.g)$name,
             indegree = igraph::degree(NU_without_meetup.g, mode = 'in'),
             outdegree = igraph::degree(NU_without_meetup.g, mode = 'out')
             # betweenness = igraph::betweenness.estimate(NU.g, cutoff = 10),
             # evcent = igraph::evcent(NU.g)$vector,
             # closeness = igraph::closeness(NU_without_meetup.g),
             # reach_2 = (igraph::ego_size(NU_without_meetup.g, 2)-1)/(igraph::vcount(NU_without_meetup.g)-1),
             # readh_3 = (igraph::ego_size(NU_without_meetup.g, 2)-1)/(igraph::vcount(NU_without_meetup.g)-1)
             # triangles = igraph::count_triangles(NU.g)
  ) 

NU_blog.df <- 
  data.frame(node = V(NU_blog.g)$name,
             indegree = igraph::degree(NU_blog.g, mode = 'in'),
             outdegree = igraph::degree(NU_blog.g, mode = 'out'),
             triangles = igraph::count_triangles(NU_blog.g))

NU_forum.df <- 
  data.frame(node = V(NU_forum.g)$name,
             indegree = igraph::degree(NU_forum.g, mode = 'in'),
             outdegree = igraph::degree(NU_forum.g, mode = 'out'),
             triangles = igraph::count_triangles(NU_forum.g))

NU_facebook.df <- 
  data.frame(node = V(NU_facebook.g)$name,
             indegree = igraph::degree(NU_facebook.g, mode = 'in'),
             outdegree = igraph::degree(NU_facebook.g, mode = 'out'),
             triangles = igraph::count_triangles(NU_facebook.g))

sum(NU_blog.df$triangles>0) / nrow(NU_blog.df)

sum(NU_forum.df$triangles>0) / nrow(NU_forum.df)

sum(NU_facebook.df$triangles>0) / nrow(NU_facebook.df)

NU_node_platform <- 
  bind_rows(
    data.frame(node = igraph::get.edgelist(NU.g)[,1],
               platform = igraph::get.edge.attribute(NU.g, "platform")),
    data.frame(node = igraph::get.edgelist(NU.g)[,2],
               platform = igraph::get.edge.attribute(NU.g, "platform"))) %>%
  dplyr::group_by(node) %>%
  dplyr::summarise(n = n(),
                   facebook = sum(platform == "facebook"),
                   forum = sum(platform == "forum"),
                   blog = sum(platform == "blog"),
                   meetup = sum(platform == "meetup"))

NU_node_platform_without_meetup <- 
  bind_rows(
    data.frame(node = igraph::get.edgelist(NU_without_meetup.g)[,1],
               platform = igraph::get.edge.attribute(NU_without_meetup.g, "platform")),
    data.frame(node = igraph::get.edgelist(NU_without_meetup.g)[,2],
               platform = igraph::get.edge.attribute(NU_without_meetup.g, "platform"))) %>%
  dplyr::group_by(node) %>%
  dplyr::summarise(n = n(),
                   facebook = sum(platform == "facebook"),
                   forum = sum(platform == "forum"),
                   blog = sum(platform == "blog"))

# Probabilities of next activity

from_facebook_probabilities.df <- 
  data.frame()

for (i in 1:50) {
  
  from_facebook_probabilities.df <- 
    rbind(from_facebook_probabilities.df,
          data.frame(
            i,
            facebook = 
              sum(NU_node_platform$facebook >= i+1) / 
              sum(NU_node_platform$facebook >= i),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$facebook >= i) / 
              sum(NU_node_platform$facebook >= i),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$facebook >= i) / 
              sum(NU_node_platform$facebook >= i),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$facebook >= i) / 
              sum(NU_node_platform$facebook >= i)
          )
    )
}

from_blog_probabilities.df <- 
  data.frame()

for (i in 1:50) {
  
  from_blog_probabilities.df <- 
    rbind(from_blog_probabilities.df,
          data.frame(
            i,
            blog = 
              sum(NU_node_platform$blog >= i+1) / 
              sum(NU_node_platform$blog >= i),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$blog >= i) / 
              sum(NU_node_platform$blog >= i),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$blog >= i) / 
              sum(NU_node_platform$blog >= i),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$blog >= i) / 
              sum(NU_node_platform$blog >= i)
          )
    )
}

from_forum_probabilities.df <- 
  data.frame()

for (i in 1:50) {
  
  from_forum_probabilities.df <- 
    rbind(from_forum_probabilities.df,
          data.frame(
            i,
            forum = 
              sum(NU_node_platform$forum >= i+1) / 
              sum(NU_node_platform$forum >= i),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$forum >= i) / 
              sum(NU_node_platform$forum >= i),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$forum >= i) / 
              sum(NU_node_platform$forum >= i),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$forum >= i) / 
              sum(NU_node_platform$forum >= i)
          )
    )
}


from_meetup_probabilities.df <- 
  data.frame()

for (i in 1:50) {
  
  from_meetup_probabilities.df <- 
    rbind(from_meetup_probabilities.df,
          data.frame(
            i,
            meetup = 
              sum(NU_node_platform$meetup >= i+1) / 
              sum(NU_node_platform$meetup >= i),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$meetup >= i) / 
              sum(NU_node_platform$meetup >= i),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$meetup >= i) / 
              sum(NU_node_platform$meetup >= i),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$meetup >= i) / 
              sum(NU_node_platform$meetup >= i)
          )
    )
}


# Candidates

df <- 
  read.csv("data/processed/pwdprotect_candidates_dummies_2.csv", header = T) #datasetParlamentarie2012networks_random.csv

## Candidate analysis

df$district = factor(df$district)
df$node <- 
  sprintf("%s %s", df$name, df$surname)

NU_node_platform <- 
  NU_node_platform %>% dplyr::left_join(df, by = "node")

NU_node_platform <- 
  NU_node_platform %>% dplyr::left_join(NU_without_meetup.df, by = "node")

## TMP
NU_node_platform <- 
  NU_node_platform %>%
  dplyr::left_join(read.csv("data/processed/pwdprotect_NU_centralities.csv") %>%
                     dplyr::select(node = user_name,
                                   evcent = TOT_eigenvector_centrality),
                   by = "node")

NU_node_platform$in_facebook <- NU_node_platform$facebook>0

# Probabilities of next activity (only candidates)

from_facebook_probabilities_only_candidates.df <- 
  data.frame()

for (i in 1:50) {
  
  from_facebook_probabilities_only_candidates.df <- 
    rbind(from_facebook_probabilities_only_candidates.df,
          data.frame(
            i,
            facebook = 
              sum(NU_node_platform$facebook >= i+1 &
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$facebook >= i & !is.na(NU_node_platform$district)),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$facebook >= i &
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$facebook >= i  &
                    !is.na(NU_node_platform$district)),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$facebook >= i &
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$facebook >= i & 
                    !is.na(NU_node_platform$district)),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$facebook >= i &
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$facebook >= i & 
                    !is.na(NU_node_platform$district))
          )
    )
}

from_blog_probabilities_only_candidates.df <- 
  data.frame()

for (i in 1:50) {
  
  from_blog_probabilities_only_candidates.df <- 
    rbind(from_blog_probabilities_only_candidates.df,
          data.frame(
            i,
            blog = 
              sum(NU_node_platform$blog >= i+1 & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$blog >= i & 
                    !is.na(NU_node_platform$district))
          )
    )
}

from_forum_probabilities_only_candidates.df <- 
  data.frame()

for (i in 1:50) {
  
  from_forum_probabilities_only_candidates.df <- 
    rbind(from_forum_probabilities_only_candidates.df,
          data.frame(
            i,
            forum = 
              sum(NU_node_platform$forum >= i+1 & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)),
            meetup = 
              sum(NU_node_platform$meetup > 0 & 
                    NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$forum >= i & 
                    !is.na(NU_node_platform$district))
          )
    )
}


from_meetup_probabilities_only_candidates.df <- 
  data.frame()

for (i in 1:50) {
  
  from_meetup_probabilities_only_candidates.df <- 
    rbind(from_meetup_probabilities_only_candidates.df,
          data.frame(
            i,
            meetup = 
              sum(NU_node_platform$meetup >= i+1 & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)),
            blog = 
              sum(NU_node_platform$blog > 0 & 
                    NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)),
            forum = 
              sum(NU_node_platform$forum > 0 & 
                    NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)),
            facebook = 
              sum(NU_node_platform$facebook > 0 & 
                    NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district)) / 
              sum(NU_node_platform$meetup >= i & 
                    !is.na(NU_node_platform$district))
          )
    )
}




# 

ggsave(filename = "output/figures/online_figure_1.png", width = 14, height = 6,
       from_facebook_probabilities.df %>%
         tidyr::pivot_longer(cols = blog:meetup) %>%
         dplyr::mutate(from = sprintf("Facebook (users = %s, edges = %s)", 
                                      format(vcount(NU_facebook.g), big.mark=","), 
                                      format(ecount(NU_facebook.g), big.mark=","))) %>%
         dplyr::bind_rows(
           from_blog_probabilities.df %>%
             tidyr::pivot_longer(cols = facebook:meetup) %>%
             dplyr::mutate(from = sprintf("Blog (users = %s, edges = %s)", 
                                          format(vcount(NU_blog.g), big.mark=","), 
                                          format(ecount(NU_blog.g), big.mark=",")))) %>%
         dplyr::bind_rows(
           from_forum_probabilities.df %>%
             tidyr::pivot_longer(cols = blog:meetup) %>%
             dplyr::mutate(from = sprintf("Forum (users = %s, edges = %s)", 
                                          format(vcount(NU_forum.g), big.mark=","), 
                                          format(ecount(NU_forum.g), big.mark=",")))) %>%
         dplyr::bind_rows(
           from_meetup_probabilities.df %>%
             tidyr::pivot_longer(cols = blog:facebook) %>%
             dplyr::mutate(from = sprintf("Meetup (users = %s, edges = %s)", 
                                          format(vcount(NU_meetup.g), big.mark=","), 
                                          format(ecount(NU_meetup.g), big.mark=",")))) %>%
         dplyr::bind_rows(
           from_facebook_probabilities_only_candidates.df %>%
             tidyr::pivot_longer(cols = blog:meetup) %>%
             dplyr::mutate(from = sprintf("Facebook (candidates = %s)", 
                                          format(sum(!is.na(NU_node_platform$district) &
                                                       NU_node_platform$facebook>0), 
                                                 big.mark=",")))) %>%
         dplyr::bind_rows(
           from_blog_probabilities_only_candidates.df %>%
             tidyr::pivot_longer(cols = facebook:meetup) %>%
             dplyr::mutate(from = sprintf("Blog (candidates = %s)", 
                                          format(sum(!is.na(NU_node_platform$district) &
                                                       NU_node_platform$blog>0), 
                                                 big.mark=",")))) %>%
         dplyr::bind_rows(
           from_forum_probabilities_only_candidates.df %>%
             tidyr::pivot_longer(cols = blog:meetup) %>%
             dplyr::mutate(from = sprintf("Forum (candidates = %s)", 
                                          format(sum(!is.na(NU_node_platform$district) &
                                                       NU_node_platform$forum>0), 
                                                 big.mark=",")))) %>%
         dplyr::bind_rows(
           from_meetup_probabilities_only_candidates.df %>%
             tidyr::pivot_longer(cols = blog:facebook) %>%
             dplyr::mutate(from = sprintf("Meetup (candidates = %s)", 
                                          format(sum(!is.na(NU_node_platform$district) &
                                                       NU_node_platform$meetup>0), 
                                                 big.mark=",")))) %>%
         ggplot(aes(x = i, y = value, colour = name)) +
         geom_line() + labs(x = 'edges within platform',
                            colour = "appears\non other platform") +
         scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
         facet_wrap('from', ncol = 4) +
         theme(legend.position = 'bottom') +
         theme_bw())
